library(httr)
library(lubridate)
library(ggplot2)
library(gganimate)
library(ggrepel)
library(patchwork)
library(data.table)
library(broom)
library(rgdal)
require(maptools)
require(rgeos)
options(scipen=2)
tf <- "~/covid/global.csv"
# GET("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", authenticate(":", ":", type="ntlm"), write_disk(tf, overwrite = TRUE))
globcases <- fread(tf)
setnames(globcases, c('dateRep', 'day', 'month', 'year', 'new_Cases', 'new_Deaths', 'Country', 'geoId', 'countryCode', 'population', 'continent', 'roll_norm_Cases'))
globcases[, Date := as.Date(dmy(dateRep))]
globcases <- globcases[, .(Country, countryCode, continent, population, Date, new_Cases, new_Deaths)]
# str(globcases)
## Create year-week field, to enable smoother merging with the other tables, as they can have NA dates.
# Start with separate fields, in case I need to add tweaks to them.
globcases[, year := strftime(globcases$Date, format='%Y')]
globcases[, week := as.integer(strftime(globcases$Date, format='%V'))]
globcases[, year_week := paste0(year, '-W', week)]
globcases[, year := NULL]
globcases[, week := NULL]
# Remove the cruise ship entry
globcases <- globcases[Country != 'Cases_on_an_international_conveyance_Japan', ]
# Some countries lag by a day in entering their data. So if I use the latest date I miss some countries,
# if I use the latest available entry, the countries are out of sync. Determine range, and as long as it's less than a couple days, take the latest common date as the "current" date.
end_dates <- globcases[, max(Date), by=Country]
if ( min(unique(end_dates$V1)) - today() >= -3) { # allow for lack of updates over weekends
message('Entries for some countries lag by a couple days. Continuing with last common date.')
current <- min(unique(end_dates$V1))
} else {
message('Entries for some countries seem to have stopped. Continuing with latest available entry date.')
current <- max(unique(end_dates$V1))
}
## Entries for some countries lag by a couple days. Continuing with last common date.
message(paste('Last entry date used:', current, '.'))
## Last entry date used: 2020-11-14 .
tf <- "~/covid/testing.csv"
# GET("https://opendata.ecdc.europa.eu/covid19/testing/csv", authenticate(":", ":", type="ntlm"), write_disk(tf, overwrite = TRUE))
tests <- fread(tf)
setnames(tests, c('Country', 'countryCode', 'year_week', 'test_Cases', 'new_Tests', 'population', 'test_Rate', 'pos_Rate', 'source'))
tests <- tests[, .(Country, year_week, new_Tests)]
# str(tests)
## The UK has been mistyped in some entries, so that needs fixing to enable better correct merging of the tables.
tests[Country=='United Kingdom', Country := 'United_Kingdom']
## Fix inconsistent formatting that is messing up the mergers
tests[, year_week:= sub('W0', 'W', year_week)]
tf <- "~/covid/hospitalizations.csv"
# GET("https://opendata.ecdc.europa.eu/covid19/hospitalicuadmissionrates/csv", authenticate(":", ":", type="ntlm"), write_disk(tf, overwrite = TRUE))
icu <- fread(tf)
setnames(icu, c('Country', 'Type', 'Date', 'year_week', 'Value', 'source', 'URL'))
## The UK has been mistyped in some entries, so that needs fixing to enable better correct merging of the tables.
icu[Country=='United Kingdom', Country := 'United_Kingdom']
icu[, Date := as.Date(Date)]
## Fix inconsistent formatting that is messing up the mergers
icu[, year_week:= sub('W0', 'W', year_week)]
## Split daily and weekly metrics that are wreaking havoc on my mergers.
occup <- unique(icu[grepl('occupancy', Type, fixed=TRUE), .(Country, Date, Type, Value)])
occup <- dcast(occup, Country + Date ~ Type, value.var = 'Value')
setnames(occup, c('Country', 'Date', 'occup_ICU', 'occup_Hosp'))
admit <- unique(icu[grepl('admissions', Type, fixed=TRUE), .(Country, year_week, Type, Value)])
admit <- dcast(admit, Country + year_week ~ Type, value.var = 'Value')
setnames(admit, c('Country', 'year_week', 'norm_new_ICU', 'norm_new_Hosp'))
admit[, norm_new_ICU := norm_new_ICU * 10 ] # re-normalize to 1M instead of 100K, to be same as my other norms
admit[, norm_new_Hosp := norm_new_Hosp * 10 ]
# str(icu)
# Marry the tables
DT <- merge(merge(globcases, occup, by=c('Country', 'Date'), all=TRUE),
merge(admit, tests, by=c('Country', 'year_week'), all=TRUE),
by=c('Country', 'year_week'), all=TRUE)
# View(merge(globcases, occup, by=c('Country', 'Date'), all=TRUE)[Country=='Denmark',])
# View(merge(admit, tests, by=c('Country', 'year_week'), all=TRUE)[Country=='Denmark',])
# View(DT[Country=='Denmark',])
# str(DT)
## Some country codes are NA in some rows. Try to fill in the missing values.
DT[, countryCode := .SD$countryCode[!is.na(.SD$countryCode)][1], by=Country]
# Sort newest last, for cumulative metrics.
setorder(DT, Country, Date)
## Missing days break the cumulative sums
DT[is.na(new_Cases), new_Cases := 0]
DT[is.na(new_Deaths), new_Deaths := 0]
DT[is.na(new_Tests), new_Tests := 0]
DT[is.na(norm_new_Hosp), norm_new_Hosp := 0]
DT[is.na(norm_new_ICU), norm_new_ICU := 0]
# Total Cases and Deaths
DT[, total_Cases := cumsum(new_Cases), by=Country]
DT[, total_Deaths := cumsum(new_Deaths), by=Country]
DT[, total_Tests := cumsum(new_Tests), by=Country]
# Sliding window cumulative cases in a W-days window.
W <- 7
DT[, roll_Cases := frollsum(new_Cases, n=W, align='right', na.rm=TRUE), by=Country]
DT[, roll_Deaths := frollsum(new_Deaths, n=W, align='right', na.rm=TRUE), by=Country]
DT[, roll_Tests := frollsum(new_Tests, n=W, align='right', na.rm=TRUE), by=Country]
# Population normalisations to P citizens
P <- 1e6
DT[, norm_new_Cases := new_Cases / population * P, by=Country]
DT[, norm_new_Deaths := new_Deaths / population * P, by=Country]
DT[, norm_new_Tests := new_Tests / population * P, by=Country]
# DT[is.na(norm_new_Cases), norm_new_Cases := 0]
# DT[is.na(norm_new_Deaths), norm_new_Deaths := 0]
# DT[is.na(norm_new_Tests), norm_new_Tests := 0]
DT[, norm_tot_Cases := total_Cases / population * P, by=Country]
DT[, norm_tot_Deaths := total_Deaths / population * P, by=Country]
DT[, norm_tot_Tests := total_Tests / population * P, by=Country]
DT[, norm_tot_ICU := cumsum(norm_new_ICU), by=Country]
DT[, norm_tot_Hosp := cumsum(norm_new_Hosp), by=Country]
# DT[is.na(norm_tot_Cases), norm_tot_Cases := 0]
# DT[is.na(norm_tot_Deaths), norm_tot_Deaths := 0]
# DT[is.na(norm_tot_Tests), norm_tot_Tests := 0]
# DT[is.na(norm_tot_ICU), norm_tot_ICU := 0]
# DT[is.na(norm_tot_Hosp), norm_tot_Hosp := 0]
DT[, norm_roll_Cases := roll_Cases / population * P, by=Country]
DT[, norm_roll_Deaths := roll_Deaths / population * P, by=Country]
DT[, norm_roll_Tests := roll_Tests / population * P, by=Country]
DT[, norm_roll_ICU := frollsum(norm_new_ICU, n=W, align='right', na.rm=TRUE), by=Country]
DT[, norm_roll_Hosp := frollsum(norm_new_Hosp, n=W, align='right', na.rm=TRUE), by=Country]
# Severity rates
DT[, Mortality := total_Deaths / total_Cases]
DT[, roll_TestCase_Rate := norm_roll_Cases / norm_roll_Tests]
DT[, roll_HospICU_Rate := norm_roll_ICU / norm_roll_Hosp]
DT[, roll_ICUDeath_Rate := norm_roll_Deaths / norm_roll_ICU]
# Global
DT[, gNew_Cases := sum(new_Cases, na.rm=TRUE), by=Date]
DT[, gNew_Deaths := sum(new_Deaths, na.rm=TRUE), by=Date]
DT[, gNew_Tests := sum(new_Tests, na.rm=TRUE), by=Date]
DT[, gTotal_Cases := sum(total_Cases, na.rm=TRUE), by=Date]
DT[, gTotal_Deaths := sum(total_Deaths, na.rm=TRUE), by=Date]
DT[, gTotal_Tests := sum(total_Tests, na.rm=TRUE), by=Date]
DT[, gRoll_Cases := sum(roll_Cases, na.rm=TRUE), by=Date]
DT[, gRoll_Deaths := sum(roll_Deaths, na.rm=TRUE), by=Date]
DT[, gRoll_Tests := sum(roll_Tests, na.rm=TRUE), by=Date]
DT[, gMortality := gTotal_Deaths / gTotal_Cases]
# Rates of daily change
DT[, roll_Cases_Rate := frollapply(roll_Cases, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, roll_Deaths_Rate := frollapply(roll_Deaths, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, roll_Tests_Rate := frollapply(norm_roll_Tests, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, roll_Hosp_Rate := frollapply(norm_roll_Hosp, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, roll_ICU_Rate := frollapply(norm_roll_ICU, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[!is.finite(roll_Cases_Rate), roll_Cases_Rate := 0]
DT[!is.finite(roll_Deaths_Rate), roll_Deaths_Rate := 0]
DT[!is.finite(roll_Tests_Rate), roll_Tests_Rate := 0]
DT[!is.finite(roll_Hosp_Rate), roll_Hosp_Rate := 0]
DT[!is.finite(roll_ICU_Rate), roll_ICU_Rate := 0]
DT[, gRoll_Cases_Rate := frollapply(gRoll_Cases, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, gRoll_Deaths_Rate := frollapply(gRoll_Deaths, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[!is.finite(gRoll_Cases_Rate), gRoll_Cases_Rate := 0]
DT[!is.finite(gRoll_Deaths_Rate), gRoll_Deaths_Rate := 0]
# Compare severity to one country.
homecountry = 'Austria'
# Select countries
topInterest <- c('Austria', 'Italy', 'Greece', 'Luxembourg', 'Germany')
neighbour_countries <- c('Switzerland', 'Slovakia', 'Slovenia', 'Czechia', 'Hungary')
other_countries <- c('United_Kingdom', 'Spain', 'France', 'Belgium', 'United_States_of_America', 'Sweden', 'South_Korea')
# Download the shape file from the web and unzip it:
# download.file("http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip", destfile="~/covid/shape_files/world_shape_file.zip")
# system("unzip ~/covid/shape_files/world_shape_file.zip")
world_spdf <- readOGR(dsn='~/covid/shape_files/world_shape_file', layer='TM_WORLD_BORDERS_SIMPL-0.3')
## OGR data source with driver: ESRI Shapefile
## Source: "D:\Documents\covid\shape_files\world_shape_file", layer: "TM_WORLD_BORDERS_SIMPL-0.3"
## with 246 features
## It has 11 fields
## Integer64 fields read as strings: POP2005
# Dataframe-ize map.
world_df <- as.data.table(tidy(world_spdf, region="ISO3"))
## SpP is invalid
# For plottting, the order of rows is super important.
# Merge operations later can change the order, so I need to be able to recover it.
world_df[, ord := 1:nrow(world_df)]
The Covid19 dataset from ECDC comes without geospacial data. The geospacial data available from other sources may not be the most up-to-date with recognised countries and names.
message(paste(sum(!(world_spdf$ISO3 %in% DT$countryCode)),
"countries in the map file do not correspond to an entry in the Covid19 data."))
## 41 countries in the map file do not correspond to an entry in the Covid19 data.
print(world_spdf$ISO3[! world_spdf$ISO3 %in% DT$countryCode])
## [1] "ASM" "COK" "GUF" "FSM" "PRK" "KIR" "MTQ" "MSR" "NIU" "HKG" "MAC" "MYT"
## [13] "ALA" "NFK" "CCK" "ATA" "BVT" "ATF" "HMD" "IOT" "CXR" "UMI" "NRU" "REU"
## [25] "TKL" "TON" "TUV" "TKM" "WLF" "WSM" "GLP" "ANT" "PCN" "PLW" "SPM" "SHN"
## [37] "SJM" "MAF" "BLM" "SGS" "TWN"
outgroup <- unique(DT[!(countryCode %in% world_spdf$ISO3), .(Country, countryCode)])
message(paste(nrow(outgroup),
"countries in the Covid19 data do not correspond to an entity in the map file:"))
## 8 countries in the Covid19 data do not correspond to an entity in the map file:
print(outgroup)
## Country countryCode
## 1: Bonaire, Saint Eustatius and Saba BES
## 2: Curaçao CUW
## 3: Kosovo XKX
## 4: Montserrat MSF
## 5: Sint_Maarten SXM
## 6: South_Sudan SSD
## 7: Taiwan CNG1925
## 8: Wallis_and_Futuna
I think for a global overview map, the missing countries will not make a big difference.
case_col = '#FF0000'
death_col = '#0055FF'
hosp_col = '#555500'
icu_col = '#995500'
test_col = '#005500'
print( data.frame(DaysTracked = length(unique(DT$Date)),
CountriesTracked = length(unique(DT$Country)) ) )
## DaysTracked CountriesTracked
## 1 322 213
print( data.frame( Global_Cases = sum(DT$new_Cases, na.rm = TRUE),
Global_Deaths = sum(DT$new_Deaths, na.rm = TRUE),
Global_Mortality = sum(DT$new_Deaths, na.rm = TRUE) / sum(DT$new_Cases, na.rm = TRUE) ) )
## Global_Cases Global_Deaths Global_Mortality
## 1 54109365 1313828 0.02428097
subDT <- DT[Date==current, , by=Country][, .(Country, norm_roll_Cases)]
# Worst
tail(subDT, n=10)
## Country norm_roll_Cases
## 1: Uruguay 148.19175725
## 2: Uzbekistan 46.72285841
## 3: Vanuatu NA
## 4: Venezuela 64.35022457
## 5: Vietnam 0.44577089
## 6: Wallis_and_Futuna NA
## 7: Western_Sahara 0.00000000
## 8: Yemen 0.06858259
## 9: Zambia 15.34065721
## 10: Zimbabwe 20.07446260
# Best
head(subDT, n=10)
## Country norm_roll_Cases
## 1: Afghanistan 26.33948
## 2: Albania 1219.59442
## 3: Algeria 120.20053
## 4: Andorra 7745.11992
## 5: Angola 30.00757
## 6: Anguilla 0.00000
## 7: Antigua_and_Barbuda 30.89121
## 8: Argentina 1508.73117
## 9: Armenia 3923.95785
## 10: Aruba 677.26460
subDT <- DT[Date==current, , by=Country][, norm_roll_Deaths]
# Worst
tail(subDT, n=10)
## [1] 0.86661846 0.45479745 NA 0.70136485 0.00000000 NA
## [7] 0.00000000 0.03429129 0.05598780 0.47796340
# Best
head(subDT, n=10)
## [1] 1.2880583 19.5638177 2.3459428 0.0000000 0.5027447 0.0000000
## [7] 10.2970705 42.6299961 69.3099568 37.6258113
subDT <- DT[continent=='Europe', max(norm_roll_Cases, na.rm=TRUE), by=Country][order(V1),]
setnames(subDT, c('Country', 'norm_roll_Cases'))
# Worst
tail(subDT, n=10)
## Country norm_roll_Cases
## 1: San_Marino 6095.260
## 2: Liechtenstein 6175.413
## 3: Slovenia 6341.943
## 4: Switzerland 6721.496
## 5: Montenegro 7068.671
## 6: Luxembourg 7804.279
## 7: Czechia 8459.032
## 8: Belgium 9868.518
## 9: Andorra 10331.202
## 10: Holy_See 17177.914
# Best
head(subDT, n=10)
## Country norm_roll_Cases
## 1: Finland 311.1680
## 2: Norway 768.1751
## 3: Belarus 802.6525
## 4: Jersey 983.3389
## 5: Russia 1024.3003
## 6: Azerbaijan 1065.6150
## 7: Estonia 1089.2046
## 8: Latvia 1229.1872
## 9: Albania 1232.5205
## 10: Denmark 1370.1152
case_col = '#FF0000'
death_col = '#0055FF'
hosp_col = '#555500'
icu_col = '#995500'
test_col = '#005500'
subDT <- merge(world_df, DT[Date==current, ], by.x='id', by.y='countryCode', all=TRUE)
setorder(subDT, ord)
Per 1M residents.
p1 <- ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Cases)) +
geom_polygon(colour='black', size=0.2) +
scale_fill_gradient(high=case_col, low='white') +
coord_cartesian(ylim=c(-60,80)) +
theme_void() +
theme(panel.background = element_rect(fill='#BBFFFF'))
p2 <- ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Deaths)) +
geom_polygon(colour='black', size=0.2) +
scale_fill_gradient(high=death_col, low='white') +
coord_cartesian(ylim=c(-60,80)) +
theme_void() +
theme(panel.background = element_rect(fill='#BBFFFF'))
print( p1 / p2 )
ggsave('~/covid/global_roll.png', plot = last_plot(), scale = 1,
width = 30, height = 30, units = "cm", dpi = 300, limitsize = TRUE)
p1 <- ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Cases)) +
geom_polygon(colour='black', size=0.2) +
scale_fill_gradient(high=case_col, low='white') +
coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
theme_void() +
theme(panel.background = element_rect(fill='#BBFFFF'))
p2 <- ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Deaths)) +
geom_polygon(colour='black', size=0.2) +
scale_fill_gradient(high=death_col, low='white') +
coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
theme_void() +
theme(panel.background = element_rect(fill='#BBFFFF'))
print( p1 / p2 )
ggsave('~/covid/eu_roll.png', plot = last_plot(), scale = 1,
width = 21, height = 36, units = "cm", dpi = 300, limitsize = TRUE)
# Testing data seems to lag in date yet a bit more than case data.
subDT <- merge(world_df, DT[Date==current-1, ], by.x='id', by.y='countryCode', all=TRUE)
setorder(subDT, ord)
if(any(is.finite(subDT$norm_roll_Tests) & subDT$norm_roll_Tests > 0)){
print(
ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Tests)) +
geom_polygon(colour='black', size=0.2) +
scale_fill_gradient(high=test_col, low='white') +
coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
theme_void() +
theme(panel.background = element_rect(fill='#BBFFFF'))
)
ggsave('~/covid/ertest.png', plot = last_plot(), scale = 1,
width = 21, height = 18, units = "cm", dpi = 300, limitsize = TRUE)
}
if(any(!is.na(subDT$norm_roll_Hosp) & subDT$norm_roll_Hosp > 0)){
print(
ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Hosp)) +
geom_polygon(colour='black', size=0.2) +
scale_fill_gradient(high=hosp_col, low='white') +
coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
theme_void() +
theme(panel.background = element_rect(fill='#BBFFFF'))
)
ggsave('~/covid/erhosp.png', plot = last_plot(), scale = 1,
width = 21, height = 18, units = "cm", dpi = 300, limitsize = TRUE)
}
if(any(!is.na(subDT$norm_roll_ICU) & subDT$norm_roll_ICU > 0)){
print(
ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_ICU)) +
geom_polygon(colour='black', size=0.2) +
scale_fill_gradient(high=icu_col, low='white') +
coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
theme_void() +
theme(panel.background = element_rect(fill='#BBFFFF'))
)
ggsave('~/covid/ericu.png', plot = last_plot(), scale = 1,
width = 21, height = 18, units = "cm", dpi = 300, limitsize = TRUE)
}
setorder(DT, Country, Date)
Normalized to 10^{6} residents.
DT[Date==current & Country %in% topInterest, .(Country, norm_roll_Cases, norm_roll_Deaths, norm_roll_Tests, norm_roll_Hosp, norm_roll_ICU)]
## Country norm_roll_Cases norm_roll_Deaths norm_roll_Tests norm_roll_Hosp
## 1: Austria 5917.297 31.49420 22527.61 0
## 2: Germany 1578.767 13.87631 18876.15 0
## 3: Greece 1624.396 26.29469 13830.54 0
## 4: Italy 4052.748 58.00242 23306.92 0
## 5: Luxembourg 5557.963 50.49732 110460.44 0
## norm_roll_ICU
## 1: 0.00000
## 2: 0.00000
## 3: 14.26627
## 4: 0.00000
## 5: 0.00000
DT[Date==current & Country %in% topInterest, .(Country, norm_tot_Cases, norm_tot_Deaths, norm_tot_Tests, norm_tot_Hosp, norm_tot_ICU)]
## Country norm_tot_Cases norm_tot_Deaths norm_tot_Tests norm_tot_Hosp
## 1: Austria 21563.252 177.67694 1836123 0.000
## 2: Germany 9317.795 149.09802 2104606 2775.996
## 3: Greece 6496.746 92.96385 1274111 0.000
## 4: Italy 18345.118 731.26793 1975683 0.000
## 5: Luxembourg 41078.753 343.70755 13094373 0.000
## norm_tot_ICU
## 1: 0.0000
## 2: 0.0000
## 3: 584.4508
## 4: 0.0000
## 5: 0.0000
## TODO: Add ICU/Hosp to tidyDT
# Numbers over time
tidyDT <- melt(DT[, .(Date, Country,
norm_new_Cases, norm_new_Deaths, norm_new_Tests, #norm_new_Hosp, norm_new_ICU,
norm_tot_Cases, norm_tot_Deaths, norm_tot_Tests, #norm_tot_Hosp, norm_tot_ICU,
norm_roll_Cases, norm_roll_Deaths, norm_roll_Tests#, norm_roll_Hosp, norm_roll_ICU
)],
id.vars = c('Date', 'Country'), variable.name = 'Type', value.name = 'Normalized_count')
tidyDT[grepl('Death', Type), vsplit := 'Deaths']
tidyDT[grepl('Cases', Type), vsplit := 'Cases']
tidyDT[grepl('Tests', Type), vsplit := 'Tests']
# tidyDT[grepl('Hosp', Type), vsplit := 'Hosp']
# tidyDT[grepl('ICU', Type), vsplit := 'ICU']
tidyDT[, hsplit := sub('_Cases|_Deaths|_Tests|_Hosp|_ICU', '', sub('norm_', '', Type), perl=TRUE)]
setkey(tidyDT, Country)
# Rolling VS Total (exponential-ness)
expDT <- DT[, .(Date, Country, norm_tot_Cases, norm_tot_Deaths, norm_roll_Cases, norm_roll_Deaths)]
tmptot <- melt(expDT, id.vars = c('Date', 'Country'), measure.vars = c('norm_tot_Cases', 'norm_tot_Deaths'), variable.name = 'Type', value.name = 'norm_Total')
tmptot[grepl('Death', Type), vsplit := 'Deaths']
tmptot[!grepl('Death', Type), vsplit := 'Cases']
tmproll <- melt(expDT, id.vars = c('Date', 'Country'), measure.vars = c('norm_roll_Cases', 'norm_roll_Deaths'), variable.name = 'Type', value.name = 'norm_Roll')
tmproll[grepl('Death', Type), vsplit := 'Deaths']
tmproll[!grepl('Death', Type), vsplit := 'Cases']
expDT <- merge(tmproll, tmptot, by=c('Date','Country', 'vsplit'))
setkey(expDT, Country)
# Rate of change
rateDT <- DT[, .(Date, Country, roll_Cases_Rate, roll_Deaths_Rate)]
rateDT <- melt(rateDT, id.vars = c('Date', 'Country'), value.name = 'daily_Rate', variable.name = 'Type')
rateDT[grepl('Death', Type), vsplit := 'Deaths']
rateDT[!grepl('Death', Type), vsplit := 'Cases']
setkey(rateDT, Country)
# Singular plots by information type.
relative_plot <- function(df=expDT, sel_country, title='') {
ggplot(df, aes(x=norm_Total, y=norm_Roll, group=Country, colour=vsplit)) +
facet_grid(vsplit ~ ., scales='free_y') +
geom_line(colour='black', alpha=0.1, size=0.2) +
geom_line(data=df[sel_country,], size=0.8) +
scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
scale_x_log10() +
scale_y_log10() +
coord_cartesian(xlim=c(1,NA), ylim=c(1,NA)) +
annotation_logticks(sides='lb') +
labs(title=title) +
theme_bw() +
theme(panel.grid=element_blank(),
legend.position = 'none')
}
numbers_plot <- function(df=tidyDT, sel_country, title='', linear=FALSE) {
p1 <- ggplot(df, aes(x=Date, y=Normalized_count, group=Country, colour=vsplit)) +
facet_grid(vsplit ~ hsplit, scales = 'free_y') +
# geom_line(colour='#000000', alpha=0.1, size=0.2) + ## Plotting all countries as background for 9 facets
## demands too much memory and crashes.
geom_line(data=df[sel_country,], size=0.8) +
scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
# coord_cartesian(ylim=c(1,NA)) +
labs(title=title, y='', x='') +
theme_bw() +
theme(legend.position = 'none',
panel.grid = element_blank())
if (linear) {
p1
} else {
p1 + scale_y_log10() +
annotation_logticks(sides='lr')
}
}
rate_plot <- function(df=rateDT, sel_country, title='', D=W) {
ggplot(df, aes(x=Date, y=daily_Rate, group=Country, colour=vsplit)) +
facet_grid(vsplit ~ ., scales='free_y') +
geom_hline(yintercept = 1, size=0.2) +
geom_point(data=df[sel_country,], size=0.5) +
geom_smooth(data=df[sel_country,], span=0.3, size=0.8) +
scale_colour_manual(values=c(Deaths=death_col, Cases=case_col)) +
coord_cartesian(ylim=c(0.6, 1.5)) +
labs(title=title, x='', y='change_Ratio') +
theme_bw() +
theme(legend.position = 'none')
}
ratio_plot <- function(df=tidyDT, sel_country, ref_country=homecountry, title='') {
ratioDT <- merge(df[sel_country], df[ref_country,], by=c('Date', 'Type'))
ratioDT[, relative := Normalized_count.x / Normalized_count.y]
ggplot(ratioDT[hsplit.y=='roll',], aes(x=Date, y=relative, colour=vsplit.y)) +
facet_grid(vsplit.y ~ hsplit.y, scales = 'free_y') +
geom_hline(yintercept = 1, size=0.15) +
geom_point(size=0.5) +
geom_smooth(span=0.3, size=0.75) +
scale_y_continuous(trans='log2', breaks=c(1/64, 1/16, 1/4, 1, 4, 16, 64)) +
scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
coord_cartesian(ylim=c(1/33, 33)) +
# annotation_logticks(sides='lr') +
labs(title=title, y='', x='') +
theme_bw() +
theme(legend.position = 'none',
axis.text.y.left = element_text())
}
## TODO : Add Hosp/Test, ICU/Hosp, Death/ICU rates to more_plots()
# Collections of singular plots.
more_plots <- function(numbers=tidyDT, exponential=expDT, rates=rateDT, sel_country, ref_country=homecountry){
cat(sel_country)
p1 <- relative_plot(exponential, sel_country,
title=paste(sel_country, ': ', W, '-day rolling sum, per', P/1e6, 'M residents, as function of total'))
p2 <- numbers_plot(numbers[,], sel_country,
title=paste(sel_country, ': new, total &', W, '-day rolling sum, normalized to', P/1e6, 'M residents'))
p3 <- rate_plot(rates, sel_country,
title=paste(sel_country, ': Daily change rate of', W, '-day roll. sum'))
p4 <- ratio_plot(numbers, sel_country, ref_country,
title=paste(sel_country,':', W, '-day norm.roll. sum, relative to', ref_country))
print( p1 / p2 / p3 / p4 )
}
fewer_plots <- function(numbers=tidyDT[hsplit=='roll',], exponential=expDT, rates=rateDT, sel_country, ref_country=homecountry){
cat(sel_country)
p1 <- relative_plot(exponential, sel_country,
title=paste(sel_country, ': ', W, '-day rolling sum, per', P/1e6, 'M residents, as function of total'))
p2 <- numbers_plot(numbers, sel_country, linear=TRUE,
title=paste(sel_country, ': new, total &', W, '-day rolling sum, normalized to', P/1e6, 'M residents'))
# p3 <- rate_plot(rates, sel_country,
# title=paste(sel_country, ': Daily change rate of', W, '-day roll. sum'))
p4 <- ratio_plot(numbers, sel_country, ref_country,
title=paste(sel_country,':', W, '-day norm.roll. sum, relative to', ref_country))
print( p1 / p2 / p4)
}
# Easier comparison plots. Many countries by info type.
compare_numbers <- function(df=tidyDT, info, countries) {
subdf <- df[Type %in% info & Country %in% countries, ]
m1 <- max(subdf[Date==current, Normalized_count], na.rm = TRUE)
p1 <- ggplot(subdf, aes(x=Date, y=Normalized_count, group=Country, colour=vsplit)) +
geom_hline(yintercept=m1, size=0.25, colour='black', linetype='dotted') +
geom_line() +
geom_point(data=subdf[Normalized_count==m1 & Date==current,], colour='black') +
facet_grid(Country ~ ., ) +
scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
theme_bw() +
theme(legend.position = 'none')
p2 <- p1 +
scale_y_log10() +
annotation_logticks(side='lr') +
coord_cartesian(ylim=c(1,NA))
print((p1 + labs(y=paste(info, '(linear scale)'))) +
(p2 + labs(y=paste(info, '(log scale)'))))
}
compare_exp <- function(df, type, info_x, info_y, countries) {
subdf <- df[vsplit %in% type & Country %in% countries, ]
m1 <- max(subdf[Date==current, c(info_y), with=FALSE], na.rm = TRUE)
m2 <- max(subdf[Date==current, c(info_x), with=FALSE], na.rm = TRUE)
p1 <- ggplot(subdf, aes_string(x=info_x, y=info_y, group='Country', colour='vsplit')) +
geom_hline(yintercept=m1, size=0.25, colour='black', linetype='dotted') +
geom_vline(xintercept=m2, size=0.25, colour='black', linetype='dotted') +
geom_line() +
geom_point(data=subdf[(subdf[[info_x]]==m2 | subdf[[info_y]]==m1) & Date==current,], colour='black') +
facet_grid(Country ~ ., ) +
scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
theme_bw() +
theme(legend.position = 'none')
p2 <- p1 +
scale_y_log10() +
scale_x_log10() +
annotation_logticks(side='lb') +
coord_cartesian(ylim=c(1,NA), xlim=c(1,NA))
print((p1 + labs(y=paste(info_y, type, '(linear scale)'), x=paste(info_x, type, '(linear scale)') )) +
(p2 + labs(y=paste(info_y, type, '(log scale)'), x=paste(info_x, type, '(linear scale)') )))
}
subDT <- melt(unique(DT[, .(Date, gNew_Cases, gNew_Deaths, gTotal_Cases, gTotal_Deaths, gRoll_Cases, gRoll_Deaths)]),
id.vars="Date", variable.name="Type", value.name="Events")
p1 <- ggplot(subDT, aes(x=Date, y=Events, colour=Type, fill=Type)) +
facet_grid( sub('^g', '', sub('_Cases', '', sub('_Deaths', '', subDT$Type))) ~ ., scales = 'free_y') +
geom_line() +
theme_minimal() +
labs(x='', y='') +
theme(legend.position='none')
p2 <- ggplot(subDT, aes(x=Date, y=Events, colour=Type, fill=Type)) +
facet_grid( sub('^g', '', sub('_Cases', '', sub('_Deaths', '', subDT$Type))) ~ ., scales = 'free_y') +
geom_line() +
scale_y_log10() +
labs(x='', y='') +
theme_minimal()
print( p1 + p2 )
subDT <- unique(DT[, .(Date, gRoll_Cases_Rate, gRoll_Deaths_Rate, gMortality)])
p3 <- ggplot(subDT, aes(x=Date, y=gRoll_Cases_Rate)) +
geom_hline(yintercept = 1, size=0.1) +
geom_point(colour=case_col, size=0.5) +
geom_smooth(span=0.2, colour=case_col, size=0.5) +
coord_cartesian(ylim=c(0.8, 1.2)) +
labs(title=paste('Daily change rate of ', W, '-day rolling sum')) +
theme_bw()
p4 <- ggplot(subDT, aes(x=Date, y=gRoll_Deaths_Rate)) +
geom_hline(yintercept = 1, size=0.1) +
geom_point(colour=death_col, size=0.5) +
geom_smooth(span=0.2, colour=death_col, size=0.5) +
coord_cartesian(ylim=c(0.8, 1.2)) +
labs(title='') +
theme_bw()
p5 <- ggplot(subDT, aes(x=Date, y=gMortality)) +
geom_hline(yintercept = 0, size=0.1) +
geom_point(colour=death_col, size=0.5) +
geom_smooth(span=0.3, colour=death_col, size=0.5) +
labs(title='') +
theme_bw()
print( p3 / p4 / p5 )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
for (i in topInterest) {
more_plots(tidyDT, expDT, rateDT, i, homecountry)
ggsave(paste0('~/covid/', i, '_more.png'), plot = last_plot(), scale = 1,
width = 20, height = 40, units = "cm", dpi = 300, limitsize = TRUE)
}
## Austria
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Italy
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Greece
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Luxembourg
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Germany
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
for (i in topInterest) {
fewer_plots(tidyDT[hsplit=='roll',], expDT, rateDT, i, homecountry)
ggsave(paste0('~/covid/', i, '_fewer.png'), plot = last_plot(), scale = 1,
width = 20, height = 40, units = "cm", dpi = 300, limitsize = TRUE)
}
## Austria
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Italy
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Greece
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Luxembourg
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Germany
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
countries <- c(topInterest, other_countries, neighbour_countries)
for (j in c('norm_roll_Cases', 'norm_roll_Deaths', 'norm_new_Tests')) {
compare_numbers(tidyDT, j, countries)
ggsave(paste0('~/covid/', j, '_comp.png'), plot = last_plot(), scale = 1,
width = 30, height = 3 * length(countries), units = "cm", dpi = 300, limitsize = TRUE)
}
for (j in c('Cases', 'Deaths')) {
compare_exp(expDT, j, 'norm_Total', 'norm_Roll', countries)
ggsave(paste0('~/covid/', j, '_comp.png'), plot = last_plot(), scale = 1,
width = 30, height = 3 * length(countries), units = "cm", dpi = 300, limitsize = TRUE)
}
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252
## [2] LC_CTYPE=English_United Kingdom.1252
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rgeos_0.5-5 maptools_1.0-2 rgdal_1.5-18 sp_1.4-4
## [5] broom_0.7.2 data.table_1.13.0 patchwork_1.0.1 ggrepel_0.8.2
## [9] gganimate_1.0.7 ggplot2_3.3.2 lubridate_1.7.9 httr_1.4.2
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.5 pillar_1.4.6 compiler_4.0.3 prettyunits_1.1.1
## [5] tools_4.0.3 progress_1.2.2 digest_0.6.25 nlme_3.1-149
## [9] lattice_0.20-41 evaluate_0.14 lifecycle_0.2.0 tibble_3.0.4
## [13] gtable_0.3.0 mgcv_1.8-33 pkgconfig_2.0.3 rlang_0.4.8
## [17] Matrix_1.2-18 yaml_2.2.1 xfun_0.18 withr_2.3.0
## [21] stringr_1.4.0 dplyr_1.0.2 knitr_1.30 generics_0.0.2
## [25] vctrs_0.3.4 hms_0.5.3 grid_4.0.3 tidyselect_1.1.0
## [29] glue_1.4.2 R6_2.4.1 gifski_0.8.6 foreign_0.8-80
## [33] rmarkdown_2.5 tidyr_1.1.2 farver_2.0.3 purrr_0.3.4
## [37] tweenr_1.0.1 magrittr_1.5 splines_4.0.3 backports_1.1.10
## [41] scales_1.1.1 ellipsis_0.3.1 htmltools_0.5.0 colorspace_1.4-1
## [45] labeling_0.4.2 stringi_1.5.3 munsell_0.5.0 crayon_1.3.4